2025-08-09
ggrepel librarybillionaires_outliers <- billionaires_2020 %>%
group_by(world_6region) %>%
mutate(Q3 = quantile(daily_income, 0.75),
IQR_col = IQR(daily_income),
outliers_above = Q3 + 1.5 * IQR_col) %>%
mutate(is_outlier = if_else(condition = daily_income > outliers_above,
true = TRUE,
false = FALSE)) %>%
arrange(desc(daily_income), world_6region) %>% ungroup()[1] "middle_east_north_africa"
library(ggrepel)
set.seed(2331) # change and re-plot until you are happy
p <- ggplot() +
geom_boxplot(data = all_oneregion,
mapping = aes(y = daily_income, x = 1),
color = "purple", outlier.size = 3) +
geom_text_repel(mapping = aes(y = daily_income,
x = 1,
label = person,
size = daily_income),
data = outliers_oneregion,
max.overlaps = 123, force = 7,
alpha = 0.7,
segment.alpha = 0.3,
segment.size = 0.2) prettified_01 <- p +
scale_x_continuous(breaks = NULL,
name = world_6region_vec[1]) +
scale_size_continuous(range = c(3,5),
breaks = seq(from = round(min(outliers_oneregion$daily_income)),
to = round(max(outliers_oneregion$daily_income)),
by = round(max(outliers_oneregion$daily_income) * 0.4))) +
scale_y_continuous(breaks = seq(from = 10^6,
to = ceiling(round(
max(all_oneregion$daily_income) * 10^(-6)) * 10^6),
by = 10^7),
labels = as.character(seq(from = 10^6,
to = ceiling(round(
max(all_oneregion$daily_income) * 10^(-6)) * 10^6),
by = 10^7)))
prettified_01if else conditionOne of the plots contained no outliers. The condition says that, in case there are no outliers, we want all billionaire names plotted instead.
library(ggrepel)
billionaires_outliers <- billionaires_2020 %>%
group_by(world_6region) %>%
mutate(Q3 = quantile(daily_income, 0.75),
IQR_col = IQR(daily_income),
outliers_above = Q3 + 1.5 * IQR_col) %>%
mutate(is_outlier = if_else(condition = daily_income > outliers_above,
true = TRUE,
false = FALSE)) %>%
arrange(desc(daily_income), world_6region)
world_6region_vec <- distinct(billionaires_outliers, world_6region) %>%
arrange(world_6region) %>%
pull()
outliers_above <- distinct(billionaires_outliers, world_6region, outliers_above) %>%
arrange(world_6region) %>%
pull()
for (i in seq_along(world_6region_vec)) {
all_oneregion <- billionaires_outliers %>%
filter(world_6region == world_6region_vec[i])
outliers_oneregion <- all_oneregion %>%
filter(is_outlier == TRUE)
y_axis_offset_for_outlier_label <- range(all_oneregion$daily_income) %>%
diff()
y_axis_offset_for_outlier_label <- y_axis_offset_for_outlier_label * 0.05
if (nrow(outliers_oneregion) == 0){
cat(world_6region_vec[i], "has no outliers. I will plot all names.\n")
p <- ggplot() +
geom_boxplot(data = all_oneregion,
mapping = aes(y = daily_income, x = 1),
color = "purple") +
geom_text_repel(mapping = aes(y = daily_income,
x = 1,
label = person,
size = daily_income),
data = all_oneregion,
max.overlaps = 100,
force = 7,
alpha = 0.7,
segment.alpha = 0.2,
segment.size = 0.1) +
scale_x_continuous(breaks = NULL,
name = world_6region_vec[i]) +
scale_y_continuous(breaks = seq(from = 10^6,
to = ceiling(round(
max(all_oneregion$daily_income) * 10^(-6)) * 10^6),
by = 10^7),
labels = as.character(
seq(from = 10^6,
to = ceiling(round(max(all_oneregion$daily_income) * 10^(-6)) * 10^6),
by = 10^7)
)
)
ggsave(plot = p, filename = paste0("../my_output_files/outliers_billionaires_", world_6region_vec[i], ".pdf"),
width = 7 * 2)
} else {
set.seed(155)
p <- ggplot() +
geom_boxplot(data = all_oneregion,
mapping = aes(y = daily_income, x = 1),
color = "purple", outlier.size = 3) +
geom_text_repel(mapping = aes(y = daily_income,
x = 1,
label = person,
size = daily_income),
data = outliers_oneregion,
max.overlaps = 100, force = 7,
alpha = 0.7,
segment.alpha = 0.3,
segment.size = 0.2) +
scale_x_continuous(breaks = NULL,
name = world_6region_vec[i]) +
scale_size_continuous(range = c(6,10),
breaks = seq(from = round(min(outliers_oneregion$daily_income)),
to = round(max(outliers_oneregion$daily_income)),
by = round(max(outliers_oneregion$daily_income) * 0.4))) +
scale_y_continuous(breaks = seq(from = 10^6,
to = ceiling(round(
max(all_oneregion$daily_income) * 10^(-6)) * 10^6),
by = 10^7),
labels = as.character(seq(from = 10^6,
to = ceiling(round(
max(all_oneregion$daily_income) * 10^(-6)) * 10^6),
by = 10^7))) +
geom_hline(yintercept = outliers_above[i],
color = "seagreen",
linewidth = 1,
linetype=3,
alpha = 0.4) +
annotate(geom = "text",
x = 0.6,
y = outliers_above[i] + y_axis_offset_for_outlier_label,
label = outliers_above[i],
color = "seagreen",
size = 6) +
theme(axis.text = element_text(size = 12),
axis.title = element_text(size = 14))
cat(world_6region_vec[i], "\n")
print(p)
ggsave(plot = p, filename = paste0(
"../my_output_files/outliers_billionaires_",
world_6region_vec[i], ".pdf"),
width = 7 * 2.2)
}
}